Type Markdown and LaTeX: α2

In [1]:
In [2]:
In [3]:
Out[3]:
array(['s', 'b'], dtype=object)
In [4]:
In [5]:
In [6]:
In [7]:
Out[7]:
DER_mass_MMC DER_mass_transverse_met_lep DER_mass_vis DER_pt_h DER_deltaeta_jet_jet DER_mass_jet_jet DER_prodeta_jet_jet DER_deltar_tau_lep DER_pt_tot DER_sum_pt ... PRI_jet_num PRI_jet_leading_pt PRI_jet_leading_eta PRI_jet_leading_phi PRI_jet_subleading_pt PRI_jet_subleading_eta PRI_jet_subleading_phi PRI_jet_all_pt Weight Label
count 250000.000000 250000.000000 250000.000000 250000.000000 250000.000000 250000.000000 250000.000000 250000.000000 250000.000000 250000.000000 ... 250000.000000 250000.000000 250000.000000 250000.000000 250000.000000 250000.000000 250000.000000 250000.000000 250000.000000 250000.000000
mean -49.023079 49.239819 81.181982 57.895962 -708.420675 -601.237051 -709.356603 2.373100 18.917332 158.432217 ... 0.979176 -348.329567 -399.254314 -399.259788 -692.381204 -709.121609 -709.118631 73.064591 1.646767 0.342668
std 406.345647 35.344886 40.828691 63.655682 454.480565 657.972302 453.019877 0.782911 22.273494 115.706115 ... 0.977426 532.962789 489.338286 489.333883 479.875496 453.384624 453.389017 98.015662 1.875103 0.474603
min -999.000000 0.000000 6.329000 0.000000 -999.000000 -999.000000 -999.000000 0.208000 0.000000 46.104000 ... 0.000000 -999.000000 -999.000000 -999.000000 -999.000000 -999.000000 -999.000000 0.000000 0.001502 0.000000
25% 78.100750 19.241000 59.388750 14.068750 -999.000000 -999.000000 -999.000000 1.810000 2.841000 77.550000 ... 0.000000 -999.000000 -999.000000 -999.000000 -999.000000 -999.000000 -999.000000 0.000000 0.018636 0.000000
50% 105.012000 46.524000 73.752000 38.467500 -999.000000 -999.000000 -999.000000 2.491500 12.315500 120.664500 ... 1.000000 38.960000 -1.872000 -2.093000 -999.000000 -999.000000 -999.000000 40.512500 1.156188 0.000000
75% 130.606250 73.598000 92.259000 79.169000 0.490000 83.446000 -4.593000 2.961000 27.591000 200.478250 ... 2.000000 75.349000 0.433000 0.503000 33.703000 -2.457000 -2.275000 109.933750 2.404128 1.000000
max 1192.026000 690.075000 1349.351000 2834.999000 8.503000 4974.979000 16.690000 5.684000 2834.999000 1852.462000 ... 3.000000 1120.573000 4.499000 3.141000 721.456000 4.500000 3.142000 1633.433000 7.822543 1.000000

8 rows × 32 columns

In [8]:
In [443]:
Out[443]:
Text(0.5, 1, 'Correlation matrix')
In [11]:
In [12]:
In [13]:
Out[13]:
RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
             with_scaling=True)
In [14]:
In [15]:
In [16]:
In [17]:
Out[17]:
PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)
In [18]:
In [19]:
Out[19]:
array([0.79941427, 0.07158938])
In [20]:
In [21]:
Out[21]:
array([0.79941427, 0.07158938, 0.02945643, 0.02295639, 0.01351438,
       0.00996207, 0.00880798, 0.0084082 , 0.00619115, 0.00534548,
       0.00445186, 0.00405008])
In [22]:
Out[22]:
[0.7994142682423108,
 0.8710036465167198,
 0.9004600789164949,
 0.9234164705934974,
 0.9369308505878781,
 0.946892924605498,
 0.9557009032955126,
 0.964109108282042,
 0.9703002544228476,
 0.9756457300864789,
 0.9800975889437665,
 0.9841476721518224]
In [23]:
Out[23]:
[<matplotlib.lines.Line2D at 0x7f769315d630>]
In [24]:
Out[24]:
(12, 30)
In [25]:
In [26]:
Out[26]:
DER_mass_MMC DER_mass_transverse_met_lep DER_mass_vis DER_pt_h DER_deltaeta_jet_jet DER_mass_jet_jet DER_prodeta_jet_jet DER_deltar_tau_lep DER_pt_tot DER_sum_pt ... PRI_met_phi PRI_met_sumet PRI_jet_num PRI_jet_leading_pt PRI_jet_leading_eta PRI_jet_leading_phi PRI_jet_subleading_pt PRI_jet_subleading_eta PRI_jet_subleading_phi PRI_jet_all_pt
0 0.995349 -0.038413 0.027727 0.026647 0.010302 0.013589 0.010314 0.019463 0.006051 0.026359 ... 0.000564 0.027486 0.014824 0.016759 0.016302 0.016300 0.010548 0.010305 0.010303 0.023016
1 -0.054560 -0.018467 -0.014592 0.365796 0.145052 0.192638 0.145342 -0.163384 0.167182 0.377842 ... 0.001444 0.340174 0.168647 0.142420 0.129454 0.129445 0.150277 0.145148 0.145121 0.352807
2 -0.017486 0.204188 0.688189 -0.032016 -0.036906 -0.051953 -0.036880 0.198528 -0.001672 0.061692 ... -0.001873 0.018460 -0.039027 -0.034266 -0.034288 -0.034284 -0.037454 -0.036887 -0.036881 -0.043294
3 -0.023935 -0.080422 0.399547 -0.011000 -0.030274 -0.032693 -0.030394 0.076708 -0.029584 0.038241 ... 0.000258 0.048186 -0.036338 -0.028207 -0.029022 -0.029015 -0.030834 -0.030313 -0.030310 -0.034375
4 0.040622 0.067508 -0.244816 0.295481 -0.250106 -0.305083 -0.250444 -0.197506 0.004540 0.018443 ... -0.000262 0.032832 -0.204747 -0.086430 -0.101232 -0.101224 -0.251974 -0.250173 -0.250133 -0.047927
5 0.016219 0.093117 0.185329 -0.067361 -0.021505 -0.040592 -0.021261 0.226532 0.780652 -0.072879 ... -0.002002 0.050763 -0.003538 -0.070937 -0.069324 -0.069312 -0.019187 -0.021407 -0.021411 0.022604
6 0.000684 0.006224 -0.013188 -0.020556 0.007366 0.009416 0.007389 -0.003352 0.066664 0.006039 ... -0.001856 0.012288 -0.002714 -0.023809 -0.024537 -0.024207 0.007891 0.007248 0.007370 0.003812
7 0.017312 0.022180 0.259510 0.137755 0.040645 0.059896 0.040490 0.197480 -0.519610 -0.111481 ... -0.000429 -0.163281 0.023878 0.077593 0.077580 0.077595 0.037895 0.040571 0.040574 -0.025283
8 0.037111 0.223778 -0.157950 -0.119322 0.163851 0.228257 0.163933 -0.032647 -0.075146 -0.010722 ... -0.013782 -0.043541 -0.119344 -0.412197 -0.426591 -0.426564 0.167048 0.163873 0.163839 -0.057361
9 -0.000234 0.004843 -0.002665 -0.000099 -0.001063 -0.002424 -0.001063 0.001844 0.006747 -0.000745 ... -0.033571 -0.006555 0.003600 0.006860 0.007379 0.007285 -0.001193 -0.001070 -0.001080 -0.001630
10 -0.003144 -0.059963 0.009251 0.019069 -0.000016 0.003380 -0.000024 0.017726 -0.002938 0.009200 ... 0.979117 0.004438 -0.013002 -0.025541 -0.027976 -0.028630 0.000532 -0.000025 -0.000160 0.012677
11 0.035328 0.623509 -0.108753 -0.171527 0.051418 0.016196 0.051593 -0.226019 0.127588 -0.197955 ... 0.072189 -0.231311 0.140567 0.236743 0.270129 0.270041 0.041431 0.051427 0.051407 -0.279006

12 rows × 30 columns

In [27]:
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7693d795c0>
In [ ]:

Type Markdown and LaTeX: α2

t-SNE

Type Markdown and LaTeX: α2

In [ ]:
In [98]:
In [109]:
In [163]:
Out[163]:
(250000, 12)
In [164]:
In [165]:
[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 250000 samples in 0.285s...
[t-SNE] Computed neighbors for 250000 samples in 138.546s...
[t-SNE] Computed conditional probabilities for sample 1000 / 250000
[t-SNE] Computed conditional probabilities for sample 2000 / 250000
[t-SNE] Computed conditional probabilities for sample 3000 / 250000
[t-SNE] Computed conditional probabilities for sample 4000 / 250000
[t-SNE] Computed conditional probabilities for sample 5000 / 250000
[t-SNE] Computed conditional probabilities for sample 6000 / 250000
[t-SNE] Computed conditional probabilities for sample 7000 / 250000
[t-SNE] Computed conditional probabilities for sample 8000 / 250000
[t-SNE] Computed conditional probabilities for sample 9000 / 250000
[t-SNE] Computed conditional probabilities for sample 10000 / 250000
[t-SNE] Computed conditional probabilities for sample 11000 / 250000
[t-SNE] Computed conditional probabilities for sample 12000 / 250000
[t-SNE] Computed conditional probabilities for sample 13000 / 250000
[t-SNE] Computed conditional probabilities for sample 14000 / 250000
[t-SNE] Computed conditional probabilities for sample 15000 / 250000
[t-SNE] Computed conditional probabilities for sample 16000 / 250000
[t-SNE] Computed conditional probabilities for sample 17000 / 250000
[t-SNE] Computed conditional probabilities for sample 18000 / 250000
[t-SNE] Computed conditional probabilities for sample 19000 / 250000
[t-SNE] Computed conditional probabilities for sample 20000 / 250000
[t-SNE] Computed conditional probabilities for sample 21000 / 250000
[t-SNE] Computed conditional probabilities for sample 22000 / 250000
[t-SNE] Computed conditional probabilities for sample 23000 / 250000
[t-SNE] Computed conditional probabilities for sample 24000 / 250000
[t-SNE] Computed conditional probabilities for sample 25000 / 250000
[t-SNE] Computed conditional probabilities for sample 26000 / 250000
[t-SNE] Computed conditional probabilities for sample 27000 / 250000
[t-SNE] Computed conditional probabilities for sample 28000 / 250000
[t-SNE] Computed conditional probabilities for sample 29000 / 250000
[t-SNE] Computed conditional probabilities for sample 30000 / 250000
[t-SNE] Computed conditional probabilities for sample 31000 / 250000
[t-SNE] Computed conditional probabilities for sample 32000 / 250000
[t-SNE] Computed conditional probabilities for sample 33000 / 250000
[t-SNE] Computed conditional probabilities for sample 34000 / 250000
[t-SNE] Computed conditional probabilities for sample 35000 / 250000
[t-SNE] Computed conditional probabilities for sample 36000 / 250000
[t-SNE] Computed conditional probabilities for sample 37000 / 250000
[t-SNE] Computed conditional probabilities for sample 38000 / 250000
[t-SNE] Computed conditional probabilities for sample 39000 / 250000
[t-SNE] Computed conditional probabilities for sample 40000 / 250000
[t-SNE] Computed conditional probabilities for sample 41000 / 250000
[t-SNE] Computed conditional probabilities for sample 42000 / 250000
[t-SNE] Computed conditional probabilities for sample 43000 / 250000
[t-SNE] Computed conditional probabilities for sample 44000 / 250000
[t-SNE] Computed conditional probabilities for sample 45000 / 250000
[t-SNE] Computed conditional probabilities for sample 46000 / 250000
[t-SNE] Computed conditional probabilities for sample 47000 / 250000
[t-SNE] Computed conditional probabilities for sample 48000 / 250000
[t-SNE] Computed conditional probabilities for sample 49000 / 250000
[t-SNE] Computed conditional probabilities for sample 50000 / 250000
[t-SNE] Computed conditional probabilities for sample 51000 / 250000
[t-SNE] Computed conditional probabilities for sample 52000 / 250000
[t-SNE] Computed conditional probabilities for sample 53000 / 250000
[t-SNE] Computed conditional probabilities for sample 54000 / 250000
[t-SNE] Computed conditional probabilities for sample 55000 / 250000
[t-SNE] Computed conditional probabilities for sample 56000 / 250000
[t-SNE] Computed conditional probabilities for sample 57000 / 250000
[t-SNE] Computed conditional probabilities for sample 58000 / 250000
[t-SNE] Computed conditional probabilities for sample 59000 / 250000
[t-SNE] Computed conditional probabilities for sample 60000 / 250000
[t-SNE] Computed conditional probabilities for sample 61000 / 250000
[t-SNE] Computed conditional probabilities for sample 62000 / 250000
[t-SNE] Computed conditional probabilities for sample 63000 / 250000
[t-SNE] Computed conditional probabilities for sample 64000 / 250000
[t-SNE] Computed conditional probabilities for sample 65000 / 250000
[t-SNE] Computed conditional probabilities for sample 66000 / 250000
[t-SNE] Computed conditional probabilities for sample 67000 / 250000
[t-SNE] Computed conditional probabilities for sample 68000 / 250000
[t-SNE] Computed conditional probabilities for sample 69000 / 250000
[t-SNE] Computed conditional probabilities for sample 70000 / 250000
[t-SNE] Computed conditional probabilities for sample 71000 / 250000
[t-SNE] Computed conditional probabilities for sample 72000 / 250000
[t-SNE] Computed conditional probabilities for sample 73000 / 250000
[t-SNE] Computed conditional probabilities for sample 74000 / 250000
[t-SNE] Computed conditional probabilities for sample 75000 / 250000
[t-SNE] Computed conditional probabilities for sample 76000 / 250000
[t-SNE] Computed conditional probabilities for sample 77000 / 250000
[t-SNE] Computed conditional probabilities for sample 78000 / 250000
[t-SNE] Computed conditional probabilities for sample 79000 / 250000
[t-SNE] Computed conditional probabilities for sample 80000 / 250000
[t-SNE] Computed conditional probabilities for sample 81000 / 250000
[t-SNE] Computed conditional probabilities for sample 82000 / 250000
[t-SNE] Computed conditional probabilities for sample 83000 / 250000
[t-SNE] Computed conditional probabilities for sample 84000 / 250000
[t-SNE] Computed conditional probabilities for sample 85000 / 250000
[t-SNE] Computed conditional probabilities for sample 86000 / 250000
[t-SNE] Computed conditional probabilities for sample 87000 / 250000
[t-SNE] Computed conditional probabilities for sample 88000 / 250000
[t-SNE] Computed conditional probabilities for sample 89000 / 250000
[t-SNE] Computed conditional probabilities for sample 90000 / 250000
[t-SNE] Computed conditional probabilities for sample 91000 / 250000
[t-SNE] Computed conditional probabilities for sample 92000 / 250000
[t-SNE] Computed conditional probabilities for sample 93000 / 250000
[t-SNE] Computed conditional probabilities for sample 94000 / 250000
[t-SNE] Computed conditional probabilities for sample 95000 / 250000
[t-SNE] Computed conditional probabilities for sample 96000 / 250000
[t-SNE] Computed conditional probabilities for sample 97000 / 250000
[t-SNE] Computed conditional probabilities for sample 98000 / 250000
[t-SNE] Computed conditional probabilities for sample 99000 / 250000
[t-SNE] Computed conditional probabilities for sample 100000 / 250000
[t-SNE] Computed conditional probabilities for sample 101000 / 250000
[t-SNE] Computed conditional probabilities for sample 102000 / 250000
[t-SNE] Computed conditional probabilities for sample 103000 / 250000
[t-SNE] Computed conditional probabilities for sample 104000 / 250000
[t-SNE] Computed conditional probabilities for sample 105000 / 250000
[t-SNE] Computed conditional probabilities for sample 106000 / 250000
[t-SNE] Computed conditional probabilities for sample 107000 / 250000
[t-SNE] Computed conditional probabilities for sample 108000 / 250000
[t-SNE] Computed conditional probabilities for sample 109000 / 250000
[t-SNE] Computed conditional probabilities for sample 110000 / 250000
[t-SNE] Computed conditional probabilities for sample 111000 / 250000
[t-SNE] Computed conditional probabilities for sample 112000 / 250000
[t-SNE] Computed conditional probabilities for sample 113000 / 250000
[t-SNE] Computed conditional probabilities for sample 114000 / 250000
[t-SNE] Computed conditional probabilities for sample 115000 / 250000
[t-SNE] Computed conditional probabilities for sample 116000 / 250000
[t-SNE] Computed conditional probabilities for sample 117000 / 250000
[t-SNE] Computed conditional probabilities for sample 118000 / 250000
[t-SNE] Computed conditional probabilities for sample 119000 / 250000
[t-SNE] Computed conditional probabilities for sample 120000 / 250000
[t-SNE] Computed conditional probabilities for sample 121000 / 250000
[t-SNE] Computed conditional probabilities for sample 122000 / 250000
[t-SNE] Computed conditional probabilities for sample 123000 / 250000
[t-SNE] Computed conditional probabilities for sample 124000 / 250000
[t-SNE] Computed conditional probabilities for sample 125000 / 250000
[t-SNE] Computed conditional probabilities for sample 126000 / 250000
[t-SNE] Computed conditional probabilities for sample 127000 / 250000
[t-SNE] Computed conditional probabilities for sample 128000 / 250000
[t-SNE] Computed conditional probabilities for sample 129000 / 250000
[t-SNE] Computed conditional probabilities for sample 130000 / 250000
[t-SNE] Computed conditional probabilities for sample 131000 / 250000
[t-SNE] Computed conditional probabilities for sample 132000 / 250000
[t-SNE] Computed conditional probabilities for sample 133000 / 250000
[t-SNE] Computed conditional probabilities for sample 134000 / 250000
[t-SNE] Computed conditional probabilities for sample 135000 / 250000
[t-SNE] Computed conditional probabilities for sample 136000 / 250000
[t-SNE] Computed conditional probabilities for sample 137000 / 250000
[t-SNE] Computed conditional probabilities for sample 138000 / 250000
[t-SNE] Computed conditional probabilities for sample 139000 / 250000
[t-SNE] Computed conditional probabilities for sample 140000 / 250000
[t-SNE] Computed conditional probabilities for sample 141000 / 250000
[t-SNE] Computed conditional probabilities for sample 142000 / 250000
[t-SNE] Computed conditional probabilities for sample 143000 / 250000
[t-SNE] Computed conditional probabilities for sample 144000 / 250000
[t-SNE] Computed conditional probabilities for sample 145000 / 250000
[t-SNE] Computed conditional probabilities for sample 146000 / 250000
[t-SNE] Computed conditional probabilities for sample 147000 / 250000
[t-SNE] Computed conditional probabilities for sample 148000 / 250000
[t-SNE] Computed conditional probabilities for sample 149000 / 250000
[t-SNE] Computed conditional probabilities for sample 150000 / 250000
[t-SNE] Computed conditional probabilities for sample 151000 / 250000
[t-SNE] Computed conditional probabilities for sample 152000 / 250000
[t-SNE] Computed conditional probabilities for sample 153000 / 250000
[t-SNE] Computed conditional probabilities for sample 154000 / 250000
[t-SNE] Computed conditional probabilities for sample 155000 / 250000
[t-SNE] Computed conditional probabilities for sample 156000 / 250000
[t-SNE] Computed conditional probabilities for sample 157000 / 250000
[t-SNE] Computed conditional probabilities for sample 158000 / 250000
[t-SNE] Computed conditional probabilities for sample 159000 / 250000
[t-SNE] Computed conditional probabilities for sample 160000 / 250000
[t-SNE] Computed conditional probabilities for sample 161000 / 250000
[t-SNE] Computed conditional probabilities for sample 162000 / 250000
[t-SNE] Computed conditional probabilities for sample 163000 / 250000
[t-SNE] Computed conditional probabilities for sample 164000 / 250000
[t-SNE] Computed conditional probabilities for sample 165000 / 250000
[t-SNE] Computed conditional probabilities for sample 166000 / 250000
[t-SNE] Computed conditional probabilities for sample 167000 / 250000
[t-SNE] Computed conditional probabilities for sample 168000 / 250000
[t-SNE] Computed conditional probabilities for sample 169000 / 250000
[t-SNE] Computed conditional probabilities for sample 170000 / 250000
[t-SNE] Computed conditional probabilities for sample 171000 / 250000
[t-SNE] Computed conditional probabilities for sample 172000 / 250000
[t-SNE] Computed conditional probabilities for sample 173000 / 250000
[t-SNE] Computed conditional probabilities for sample 174000 / 250000
[t-SNE] Computed conditional probabilities for sample 175000 / 250000
[t-SNE] Computed conditional probabilities for sample 176000 / 250000
[t-SNE] Computed conditional probabilities for sample 177000 / 250000
[t-SNE] Computed conditional probabilities for sample 178000 / 250000
[t-SNE] Computed conditional probabilities for sample 179000 / 250000
[t-SNE] Computed conditional probabilities for sample 180000 / 250000
[t-SNE] Computed conditional probabilities for sample 181000 / 250000
[t-SNE] Computed conditional probabilities for sample 182000 / 250000
[t-SNE] Computed conditional probabilities for sample 183000 / 250000
[t-SNE] Computed conditional probabilities for sample 184000 / 250000
[t-SNE] Computed conditional probabilities for sample 185000 / 250000
[t-SNE] Computed conditional probabilities for sample 186000 / 250000
[t-SNE] Computed conditional probabilities for sample 187000 / 250000
[t-SNE] Computed conditional probabilities for sample 188000 / 250000
[t-SNE] Computed conditional probabilities for sample 189000 / 250000
[t-SNE] Computed conditional probabilities for sample 190000 / 250000
[t-SNE] Computed conditional probabilities for sample 191000 / 250000
[t-SNE] Computed conditional probabilities for sample 192000 / 250000
[t-SNE] Computed conditional probabilities for sample 193000 / 250000
[t-SNE] Computed conditional probabilities for sample 194000 / 250000
[t-SNE] Computed conditional probabilities for sample 195000 / 250000
[t-SNE] Computed conditional probabilities for sample 196000 / 250000
[t-SNE] Computed conditional probabilities for sample 197000 / 250000
[t-SNE] Computed conditional probabilities for sample 198000 / 250000
[t-SNE] Computed conditional probabilities for sample 199000 / 250000
[t-SNE] Computed conditional probabilities for sample 200000 / 250000
[t-SNE] Computed conditional probabilities for sample 201000 / 250000
[t-SNE] Computed conditional probabilities for sample 202000 / 250000
[t-SNE] Computed conditional probabilities for sample 203000 / 250000
[t-SNE] Computed conditional probabilities for sample 204000 / 250000
[t-SNE] Computed conditional probabilities for sample 205000 / 250000
[t-SNE] Computed conditional probabilities for sample 206000 / 250000
[t-SNE] Computed conditional probabilities for sample 207000 / 250000
[t-SNE] Computed conditional probabilities for sample 208000 / 250000
[t-SNE] Computed conditional probabilities for sample 209000 / 250000
[t-SNE] Computed conditional probabilities for sample 210000 / 250000
[t-SNE] Computed conditional probabilities for sample 211000 / 250000
[t-SNE] Computed conditional probabilities for sample 212000 / 250000
[t-SNE] Computed conditional probabilities for sample 213000 / 250000
[t-SNE] Computed conditional probabilities for sample 214000 / 250000
[t-SNE] Computed conditional probabilities for sample 215000 / 250000
[t-SNE] Computed conditional probabilities for sample 216000 / 250000
[t-SNE] Computed conditional probabilities for sample 217000 / 250000
[t-SNE] Computed conditional probabilities for sample 218000 / 250000
[t-SNE] Computed conditional probabilities for sample 219000 / 250000
[t-SNE] Computed conditional probabilities for sample 220000 / 250000
[t-SNE] Computed conditional probabilities for sample 221000 / 250000
[t-SNE] Computed conditional probabilities for sample 222000 / 250000
[t-SNE] Computed conditional probabilities for sample 223000 / 250000
[t-SNE] Computed conditional probabilities for sample 224000 / 250000
[t-SNE] Computed conditional probabilities for sample 225000 / 250000
[t-SNE] Computed conditional probabilities for sample 226000 / 250000
[t-SNE] Computed conditional probabilities for sample 227000 / 250000
[t-SNE] Computed conditional probabilities for sample 228000 / 250000
[t-SNE] Computed conditional probabilities for sample 229000 / 250000
[t-SNE] Computed conditional probabilities for sample 230000 / 250000
[t-SNE] Computed conditional probabilities for sample 231000 / 250000
[t-SNE] Computed conditional probabilities for sample 232000 / 250000
[t-SNE] Computed conditional probabilities for sample 233000 / 250000
[t-SNE] Computed conditional probabilities for sample 234000 / 250000
[t-SNE] Computed conditional probabilities for sample 235000 / 250000
[t-SNE] Computed conditional probabilities for sample 236000 / 250000
[t-SNE] Computed conditional probabilities for sample 237000 / 250000
[t-SNE] Computed conditional probabilities for sample 238000 / 250000
[t-SNE] Computed conditional probabilities for sample 239000 / 250000
[t-SNE] Computed conditional probabilities for sample 240000 / 250000
[t-SNE] Computed conditional probabilities for sample 241000 / 250000
[t-SNE] Computed conditional probabilities for sample 242000 / 250000
[t-SNE] Computed conditional probabilities for sample 243000 / 250000
[t-SNE] Computed conditional probabilities for sample 244000 / 250000
[t-SNE] Computed conditional probabilities for sample 245000 / 250000
[t-SNE] Computed conditional probabilities for sample 246000 / 250000
[t-SNE] Computed conditional probabilities for sample 247000 / 250000
[t-SNE] Computed conditional probabilities for sample 248000 / 250000
[t-SNE] Computed conditional probabilities for sample 249000 / 250000
[t-SNE] Computed conditional probabilities for sample 250000 / 250000
[t-SNE] Mean sigma: 0.387824
[t-SNE] KL divergence after 50 iterations with early exaggeration: 125.084518
[t-SNE] KL divergence after 250 iterations: 5.860478
t-SNE done! Time elapsed: 6838.500510692596 seconds
In [166]:
Out[166]:
(250000, 3)
In [172]:
In [50]:
In [51]:
In [52]:
In [169]:
In [53]:
In [444]:
Out[444]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7603333320>
In [ ]:
In [99]:
In [177]:
[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 250000 samples in 0.282s...
[t-SNE] Computed neighbors for 250000 samples in 139.670s...
[t-SNE] Computed conditional probabilities for sample 1000 / 250000
[t-SNE] Computed conditional probabilities for sample 2000 / 250000
[t-SNE] Computed conditional probabilities for sample 3000 / 250000
[t-SNE] Computed conditional probabilities for sample 4000 / 250000
[t-SNE] Computed conditional probabilities for sample 5000 / 250000
[t-SNE] Computed conditional probabilities for sample 6000 / 250000
[t-SNE] Computed conditional probabilities for sample 7000 / 250000
[t-SNE] Computed conditional probabilities for sample 8000 / 250000
[t-SNE] Computed conditional probabilities for sample 9000 / 250000
[t-SNE] Computed conditional probabilities for sample 10000 / 250000
[t-SNE] Computed conditional probabilities for sample 11000 / 250000
[t-SNE] Computed conditional probabilities for sample 12000 / 250000
[t-SNE] Computed conditional probabilities for sample 13000 / 250000
[t-SNE] Computed conditional probabilities for sample 14000 / 250000
[t-SNE] Computed conditional probabilities for sample 15000 / 250000
[t-SNE] Computed conditional probabilities for sample 16000 / 250000
[t-SNE] Computed conditional probabilities for sample 17000 / 250000
[t-SNE] Computed conditional probabilities for sample 18000 / 250000
[t-SNE] Computed conditional probabilities for sample 19000 / 250000
[t-SNE] Computed conditional probabilities for sample 20000 / 250000
[t-SNE] Computed conditional probabilities for sample 21000 / 250000
[t-SNE] Computed conditional probabilities for sample 22000 / 250000
[t-SNE] Computed conditional probabilities for sample 23000 / 250000
[t-SNE] Computed conditional probabilities for sample 24000 / 250000
[t-SNE] Computed conditional probabilities for sample 25000 / 250000
[t-SNE] Computed conditional probabilities for sample 26000 / 250000
[t-SNE] Computed conditional probabilities for sample 27000 / 250000
[t-SNE] Computed conditional probabilities for sample 28000 / 250000
[t-SNE] Computed conditional probabilities for sample 29000 / 250000
[t-SNE] Computed conditional probabilities for sample 30000 / 250000
[t-SNE] Computed conditional probabilities for sample 31000 / 250000
[t-SNE] Computed conditional probabilities for sample 32000 / 250000
[t-SNE] Computed conditional probabilities for sample 33000 / 250000
[t-SNE] Computed conditional probabilities for sample 34000 / 250000
[t-SNE] Computed conditional probabilities for sample 35000 / 250000
[t-SNE] Computed conditional probabilities for sample 36000 / 250000
[t-SNE] Computed conditional probabilities for sample 37000 / 250000
[t-SNE] Computed conditional probabilities for sample 38000 / 250000
[t-SNE] Computed conditional probabilities for sample 39000 / 250000
[t-SNE] Computed conditional probabilities for sample 40000 / 250000
[t-SNE] Computed conditional probabilities for sample 41000 / 250000
[t-SNE] Computed conditional probabilities for sample 42000 / 250000
[t-SNE] Computed conditional probabilities for sample 43000 / 250000
[t-SNE] Computed conditional probabilities for sample 44000 / 250000
[t-SNE] Computed conditional probabilities for sample 45000 / 250000
[t-SNE] Computed conditional probabilities for sample 46000 / 250000
[t-SNE] Computed conditional probabilities for sample 47000 / 250000
[t-SNE] Computed conditional probabilities for sample 48000 / 250000
[t-SNE] Computed conditional probabilities for sample 49000 / 250000
[t-SNE] Computed conditional probabilities for sample 50000 / 250000
[t-SNE] Computed conditional probabilities for sample 51000 / 250000
[t-SNE] Computed conditional probabilities for sample 52000 / 250000
[t-SNE] Computed conditional probabilities for sample 53000 / 250000
[t-SNE] Computed conditional probabilities for sample 54000 / 250000
[t-SNE] Computed conditional probabilities for sample 55000 / 250000
[t-SNE] Computed conditional probabilities for sample 56000 / 250000
[t-SNE] Computed conditional probabilities for sample 57000 / 250000
[t-SNE] Computed conditional probabilities for sample 58000 / 250000
[t-SNE] Computed conditional probabilities for sample 59000 / 250000
[t-SNE] Computed conditional probabilities for sample 60000 / 250000
[t-SNE] Computed conditional probabilities for sample 61000 / 250000
[t-SNE] Computed conditional probabilities for sample 62000 / 250000
[t-SNE] Computed conditional probabilities for sample 63000 / 250000
[t-SNE] Computed conditional probabilities for sample 64000 / 250000
[t-SNE] Computed conditional probabilities for sample 65000 / 250000
[t-SNE] Computed conditional probabilities for sample 66000 / 250000
[t-SNE] Computed conditional probabilities for sample 67000 / 250000
[t-SNE] Computed conditional probabilities for sample 68000 / 250000
[t-SNE] Computed conditional probabilities for sample 69000 / 250000
[t-SNE] Computed conditional probabilities for sample 70000 / 250000
[t-SNE] Computed conditional probabilities for sample 71000 / 250000
[t-SNE] Computed conditional probabilities for sample 72000 / 250000
[t-SNE] Computed conditional probabilities for sample 73000 / 250000
[t-SNE] Computed conditional probabilities for sample 74000 / 250000
[t-SNE] Computed conditional probabilities for sample 75000 / 250000
[t-SNE] Computed conditional probabilities for sample 76000 / 250000
[t-SNE] Computed conditional probabilities for sample 77000 / 250000
[t-SNE] Computed conditional probabilities for sample 78000 / 250000
[t-SNE] Computed conditional probabilities for sample 79000 / 250000
[t-SNE] Computed conditional probabilities for sample 80000 / 250000
[t-SNE] Computed conditional probabilities for sample 81000 / 250000
[t-SNE] Computed conditional probabilities for sample 82000 / 250000
[t-SNE] Computed conditional probabilities for sample 83000 / 250000
[t-SNE] Computed conditional probabilities for sample 84000 / 250000
[t-SNE] Computed conditional probabilities for sample 85000 / 250000
[t-SNE] Computed conditional probabilities for sample 86000 / 250000
[t-SNE] Computed conditional probabilities for sample 87000 / 250000
[t-SNE] Computed conditional probabilities for sample 88000 / 250000
[t-SNE] Computed conditional probabilities for sample 89000 / 250000
[t-SNE] Computed conditional probabilities for sample 90000 / 250000
[t-SNE] Computed conditional probabilities for sample 91000 / 250000
[t-SNE] Computed conditional probabilities for sample 92000 / 250000
[t-SNE] Computed conditional probabilities for sample 93000 / 250000
[t-SNE] Computed conditional probabilities for sample 94000 / 250000
[t-SNE] Computed conditional probabilities for sample 95000 / 250000
[t-SNE] Computed conditional probabilities for sample 96000 / 250000
[t-SNE] Computed conditional probabilities for sample 97000 / 250000
[t-SNE] Computed conditional probabilities for sample 98000 / 250000
[t-SNE] Computed conditional probabilities for sample 99000 / 250000
[t-SNE] Computed conditional probabilities for sample 100000 / 250000
[t-SNE] Computed conditional probabilities for sample 101000 / 250000
[t-SNE] Computed conditional probabilities for sample 102000 / 250000
[t-SNE] Computed conditional probabilities for sample 103000 / 250000
[t-SNE] Computed conditional probabilities for sample 104000 / 250000
[t-SNE] Computed conditional probabilities for sample 105000 / 250000
[t-SNE] Computed conditional probabilities for sample 106000 / 250000
[t-SNE] Computed conditional probabilities for sample 107000 / 250000
[t-SNE] Computed conditional probabilities for sample 108000 / 250000
[t-SNE] Computed conditional probabilities for sample 109000 / 250000
[t-SNE] Computed conditional probabilities for sample 110000 / 250000
[t-SNE] Computed conditional probabilities for sample 111000 / 250000
[t-SNE] Computed conditional probabilities for sample 112000 / 250000
[t-SNE] Computed conditional probabilities for sample 113000 / 250000
[t-SNE] Computed conditional probabilities for sample 114000 / 250000
[t-SNE] Computed conditional probabilities for sample 115000 / 250000
[t-SNE] Computed conditional probabilities for sample 116000 / 250000
[t-SNE] Computed conditional probabilities for sample 117000 / 250000
[t-SNE] Computed conditional probabilities for sample 118000 / 250000
[t-SNE] Computed conditional probabilities for sample 119000 / 250000
[t-SNE] Computed conditional probabilities for sample 120000 / 250000
[t-SNE] Computed conditional probabilities for sample 121000 / 250000
[t-SNE] Computed conditional probabilities for sample 122000 / 250000
[t-SNE] Computed conditional probabilities for sample 123000 / 250000
[t-SNE] Computed conditional probabilities for sample 124000 / 250000
[t-SNE] Computed conditional probabilities for sample 125000 / 250000
[t-SNE] Computed conditional probabilities for sample 126000 / 250000
[t-SNE] Computed conditional probabilities for sample 127000 / 250000
[t-SNE] Computed conditional probabilities for sample 128000 / 250000
[t-SNE] Computed conditional probabilities for sample 129000 / 250000
[t-SNE] Computed conditional probabilities for sample 130000 / 250000
[t-SNE] Computed conditional probabilities for sample 131000 / 250000
[t-SNE] Computed conditional probabilities for sample 132000 / 250000
[t-SNE] Computed conditional probabilities for sample 133000 / 250000
[t-SNE] Computed conditional probabilities for sample 134000 / 250000
[t-SNE] Computed conditional probabilities for sample 135000 / 250000
[t-SNE] Computed conditional probabilities for sample 136000 / 250000
[t-SNE] Computed conditional probabilities for sample 137000 / 250000
[t-SNE] Computed conditional probabilities for sample 138000 / 250000
[t-SNE] Computed conditional probabilities for sample 139000 / 250000
[t-SNE] Computed conditional probabilities for sample 140000 / 250000
[t-SNE] Computed conditional probabilities for sample 141000 / 250000
[t-SNE] Computed conditional probabilities for sample 142000 / 250000
[t-SNE] Computed conditional probabilities for sample 143000 / 250000
[t-SNE] Computed conditional probabilities for sample 144000 / 250000
[t-SNE] Computed conditional probabilities for sample 145000 / 250000
[t-SNE] Computed conditional probabilities for sample 146000 / 250000
[t-SNE] Computed conditional probabilities for sample 147000 / 250000
[t-SNE] Computed conditional probabilities for sample 148000 / 250000
[t-SNE] Computed conditional probabilities for sample 149000 / 250000
[t-SNE] Computed conditional probabilities for sample 150000 / 250000
[t-SNE] Computed conditional probabilities for sample 151000 / 250000
[t-SNE] Computed conditional probabilities for sample 152000 / 250000
[t-SNE] Computed conditional probabilities for sample 153000 / 250000
[t-SNE] Computed conditional probabilities for sample 154000 / 250000
[t-SNE] Computed conditional probabilities for sample 155000 / 250000
[t-SNE] Computed conditional probabilities for sample 156000 / 250000
[t-SNE] Computed conditional probabilities for sample 157000 / 250000
[t-SNE] Computed conditional probabilities for sample 158000 / 250000
[t-SNE] Computed conditional probabilities for sample 159000 / 250000
[t-SNE] Computed conditional probabilities for sample 160000 / 250000
[t-SNE] Computed conditional probabilities for sample 161000 / 250000
[t-SNE] Computed conditional probabilities for sample 162000 / 250000
[t-SNE] Computed conditional probabilities for sample 163000 / 250000
[t-SNE] Computed conditional probabilities for sample 164000 / 250000
[t-SNE] Computed conditional probabilities for sample 165000 / 250000
[t-SNE] Computed conditional probabilities for sample 166000 / 250000
[t-SNE] Computed conditional probabilities for sample 167000 / 250000
[t-SNE] Computed conditional probabilities for sample 168000 / 250000
[t-SNE] Computed conditional probabilities for sample 169000 / 250000
[t-SNE] Computed conditional probabilities for sample 170000 / 250000
[t-SNE] Computed conditional probabilities for sample 171000 / 250000
[t-SNE] Computed conditional probabilities for sample 172000 / 250000
[t-SNE] Computed conditional probabilities for sample 173000 / 250000
[t-SNE] Computed conditional probabilities for sample 174000 / 250000
[t-SNE] Computed conditional probabilities for sample 175000 / 250000
[t-SNE] Computed conditional probabilities for sample 176000 / 250000
[t-SNE] Computed conditional probabilities for sample 177000 / 250000
[t-SNE] Computed conditional probabilities for sample 178000 / 250000
[t-SNE] Computed conditional probabilities for sample 179000 / 250000
[t-SNE] Computed conditional probabilities for sample 180000 / 250000
[t-SNE] Computed conditional probabilities for sample 181000 / 250000
[t-SNE] Computed conditional probabilities for sample 182000 / 250000
[t-SNE] Computed conditional probabilities for sample 183000 / 250000
[t-SNE] Computed conditional probabilities for sample 184000 / 250000
[t-SNE] Computed conditional probabilities for sample 185000 / 250000
[t-SNE] Computed conditional probabilities for sample 186000 / 250000
[t-SNE] Computed conditional probabilities for sample 187000 / 250000
[t-SNE] Computed conditional probabilities for sample 188000 / 250000
[t-SNE] Computed conditional probabilities for sample 189000 / 250000
[t-SNE] Computed conditional probabilities for sample 190000 / 250000
[t-SNE] Computed conditional probabilities for sample 191000 / 250000
[t-SNE] Computed conditional probabilities for sample 192000 / 250000
[t-SNE] Computed conditional probabilities for sample 193000 / 250000
[t-SNE] Computed conditional probabilities for sample 194000 / 250000
[t-SNE] Computed conditional probabilities for sample 195000 / 250000
[t-SNE] Computed conditional probabilities for sample 196000 / 250000
[t-SNE] Computed conditional probabilities for sample 197000 / 250000
[t-SNE] Computed conditional probabilities for sample 198000 / 250000
[t-SNE] Computed conditional probabilities for sample 199000 / 250000
[t-SNE] Computed conditional probabilities for sample 200000 / 250000
[t-SNE] Computed conditional probabilities for sample 201000 / 250000
[t-SNE] Computed conditional probabilities for sample 202000 / 250000
[t-SNE] Computed conditional probabilities for sample 203000 / 250000
[t-SNE] Computed conditional probabilities for sample 204000 / 250000
[t-SNE] Computed conditional probabilities for sample 205000 / 250000
[t-SNE] Computed conditional probabilities for sample 206000 / 250000
[t-SNE] Computed conditional probabilities for sample 207000 / 250000
[t-SNE] Computed conditional probabilities for sample 208000 / 250000
[t-SNE] Computed conditional probabilities for sample 209000 / 250000
[t-SNE] Computed conditional probabilities for sample 210000 / 250000
[t-SNE] Computed conditional probabilities for sample 211000 / 250000
[t-SNE] Computed conditional probabilities for sample 212000 / 250000
[t-SNE] Computed conditional probabilities for sample 213000 / 250000
[t-SNE] Computed conditional probabilities for sample 214000 / 250000
[t-SNE] Computed conditional probabilities for sample 215000 / 250000
[t-SNE] Computed conditional probabilities for sample 216000 / 250000
[t-SNE] Computed conditional probabilities for sample 217000 / 250000
[t-SNE] Computed conditional probabilities for sample 218000 / 250000
[t-SNE] Computed conditional probabilities for sample 219000 / 250000
[t-SNE] Computed conditional probabilities for sample 220000 / 250000
[t-SNE] Computed conditional probabilities for sample 221000 / 250000
[t-SNE] Computed conditional probabilities for sample 222000 / 250000
[t-SNE] Computed conditional probabilities for sample 223000 / 250000
[t-SNE] Computed conditional probabilities for sample 224000 / 250000
[t-SNE] Computed conditional probabilities for sample 225000 / 250000
[t-SNE] Computed conditional probabilities for sample 226000 / 250000
[t-SNE] Computed conditional probabilities for sample 227000 / 250000
[t-SNE] Computed conditional probabilities for sample 228000 / 250000
[t-SNE] Computed conditional probabilities for sample 229000 / 250000
[t-SNE] Computed conditional probabilities for sample 230000 / 250000
[t-SNE] Computed conditional probabilities for sample 231000 / 250000
[t-SNE] Computed conditional probabilities for sample 232000 / 250000
[t-SNE] Computed conditional probabilities for sample 233000 / 250000
[t-SNE] Computed conditional probabilities for sample 234000 / 250000
[t-SNE] Computed conditional probabilities for sample 235000 / 250000
[t-SNE] Computed conditional probabilities for sample 236000 / 250000
[t-SNE] Computed conditional probabilities for sample 237000 / 250000
[t-SNE] Computed conditional probabilities for sample 238000 / 250000
[t-SNE] Computed conditional probabilities for sample 239000 / 250000
[t-SNE] Computed conditional probabilities for sample 240000 / 250000
[t-SNE] Computed conditional probabilities for sample 241000 / 250000
[t-SNE] Computed conditional probabilities for sample 242000 / 250000
[t-SNE] Computed conditional probabilities for sample 243000 / 250000
[t-SNE] Computed conditional probabilities for sample 244000 / 250000
[t-SNE] Computed conditional probabilities for sample 245000 / 250000
[t-SNE] Computed conditional probabilities for sample 246000 / 250000
[t-SNE] Computed conditional probabilities for sample 247000 / 250000
[t-SNE] Computed conditional probabilities for sample 248000 / 250000
[t-SNE] Computed conditional probabilities for sample 249000 / 250000
[t-SNE] Computed conditional probabilities for sample 250000 / 250000
[t-SNE] Mean sigma: 0.387824
[t-SNE] KL divergence after 50 iterations with early exaggeration: 125.084518
[t-SNE] KL divergence after 250 iterations: 6.187023
t-SNE done! Time elapsed: 1069.5455431938171 seconds
In [178]:
In [179]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:

UMAP

In [ ]:
In [76]:
In [77]:
In [66]:
In [67]:
In [79]:
/home/013729625/dsenv/lib64/python3.6/site-packages/numba/typed_passes.py:271: NumbaPerformanceWarning:


The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "dsenv/lib64/python3.6/site-packages/umap/rp_tree.py", line 135:
@numba.njit(fastmath=True, nogil=True, parallel=True)
def euclidean_random_projection_split(data, indices, rng_state):
^


/home/013729625/dsenv/lib64/python3.6/site-packages/numba/typed_passes.py:271: NumbaPerformanceWarning:


The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "dsenv/lib64/python3.6/site-packages/umap/nndescent.py", line 47:
    @numba.njit(parallel=True)
    def nn_descent(
    ^


Out[79]:
(250000, 2)
In [400]:
In [82]:
In [85]:
In [86]:
/home/013729625/dsenv/lib64/python3.6/site-packages/numba/typed_passes.py:271: NumbaPerformanceWarning:


The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "dsenv/lib64/python3.6/site-packages/umap/nndescent.py", line 47:
    @numba.njit(parallel=True)
    def nn_descent(
    ^


In [87]:
Out[87]:
(250000, 2)
In [403]:
In [ ]:
In [92]:
In [94]:
In [96]:
In [ ]:
In [110]:
[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 10000 samples in 0.023s...
[t-SNE] Computed neighbors for 10000 samples in 3.916s...
[t-SNE] Computed conditional probabilities for sample 1000 / 10000
[t-SNE] Computed conditional probabilities for sample 2000 / 10000
[t-SNE] Computed conditional probabilities for sample 3000 / 10000
[t-SNE] Computed conditional probabilities for sample 4000 / 10000
[t-SNE] Computed conditional probabilities for sample 5000 / 10000
[t-SNE] Computed conditional probabilities for sample 6000 / 10000
[t-SNE] Computed conditional probabilities for sample 7000 / 10000
[t-SNE] Computed conditional probabilities for sample 8000 / 10000
[t-SNE] Computed conditional probabilities for sample 9000 / 10000
[t-SNE] Computed conditional probabilities for sample 10000 / 10000
[t-SNE] Mean sigma: 0.818923
[t-SNE] KL divergence after 250 iterations with early exaggeration: 80.969681
[t-SNE] KL divergence after 251 iterations: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000
t-SNE done! Time elapsed: 102.87509083747864 seconds
In [111]:
Out[111]:
array([[ 1.1844845 ,  1.2650347 , -1.3530798 ],
       [ 1.0320665 , -1.0249159 ,  1.1623231 ],
       [-1.8379277 , -1.7901961 , -2.386018  ],
       ...,
       [ 1.4897859 ,  0.77739704, -1.1057904 ],
       [-1.8114654 ,  0.6289672 ,  1.5614039 ],
       [-1.8921157 ,  0.57281023,  1.5513233 ]], dtype=float32)
In [116]:
In [107]:
Out[107]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f773875dd30>
In [ ]:
In [ ]:
In [ ]:
In [ ]:

11/20

In [212]:
Out[212]:
EventId DER_mass_MMC DER_mass_transverse_met_lep DER_mass_vis DER_pt_h DER_deltaeta_jet_jet DER_mass_jet_jet DER_prodeta_jet_jet DER_deltar_tau_lep DER_pt_tot ... PRI_jet_num PRI_jet_leading_pt PRI_jet_leading_eta PRI_jet_leading_phi PRI_jet_subleading_pt PRI_jet_subleading_eta PRI_jet_subleading_phi PRI_jet_all_pt Weight Label
0 100000 138.470 51.655 97.827 27.980 0.91 124.711 2.666 3.064 41.928 ... 2 67.435 2.150 0.444 46.062 1.24 -2.475 113.497 0.002653 1
1 100001 160.937 68.768 103.235 48.146 -999.00 -999.000 -999.000 3.473 2.078 ... 1 46.226 0.725 1.158 -999.000 -999.00 -999.000 46.226 2.233584 0
2 100002 -999.000 162.172 125.953 35.635 -999.00 -999.000 -999.000 3.148 9.336 ... 1 44.251 2.053 -2.028 -999.000 -999.00 -999.000 44.251 2.347389 0
3 100003 143.905 81.417 80.943 0.414 -999.00 -999.000 -999.000 3.310 0.414 ... 0 -999.000 -999.000 -999.000 -999.000 -999.00 -999.000 -0.000 5.446378 0
4 100004 175.864 16.915 134.805 16.405 -999.00 -999.000 -999.000 3.891 16.405 ... 0 -999.000 -999.000 -999.000 -999.000 -999.00 -999.000 0.000 6.245333 0

5 rows × 33 columns

In [127]:
Out[127]:
(array([1.77457e+05, 0.00000e+00, 0.00000e+00, 0.00000e+00, 0.00000e+00,
        0.00000e+00, 0.00000e+00, 0.00000e+00, 7.95200e+03, 2.44800e+04,
        1.23040e+04, 7.50000e+03, 5.26300e+03, 3.69900e+03, 2.65800e+03,
        2.08000e+03, 1.53300e+03, 1.16600e+03, 8.66000e+02, 7.09000e+02,
        5.16000e+02, 3.96000e+02, 3.59000e+02, 2.52000e+02, 1.92000e+02,
        1.62000e+02, 1.04000e+02, 8.30000e+01, 6.50000e+01, 5.00000e+01,
        3.40000e+01, 3.70000e+01, 1.80000e+01, 1.90000e+01, 1.30000e+01,
        6.00000e+00, 6.00000e+00, 6.00000e+00, 4.00000e+00, 4.00000e+00,
        1.00000e+00, 2.00000e+00, 1.00000e+00, 1.00000e+00, 0.00000e+00,
        0.00000e+00, 1.00000e+00, 0.00000e+00, 0.00000e+00, 1.00000e+00]),
 array([-999.     , -879.52042, -760.04084, -640.56126, -521.08168,
        -401.6021 , -282.12252, -162.64294,  -43.16336,   76.31622,
         195.7958 ,  315.27538,  434.75496,  554.23454,  673.71412,
         793.1937 ,  912.67328, 1032.15286, 1151.63244, 1271.11202,
        1390.5916 , 1510.07118, 1629.55076, 1749.03034, 1868.50992,
        1987.9895 , 2107.46908, 2226.94866, 2346.42824, 2465.90782,
        2585.3874 , 2704.86698, 2824.34656, 2943.82614, 3063.30572,
        3182.7853 , 3302.26488, 3421.74446, 3541.22404, 3660.70362,
        3780.1832 , 3899.66278, 4019.14236, 4138.62194, 4258.10152,
        4377.5811 , 4497.06068, 4616.54026, 4736.01984, 4855.49942,
        4974.979  ]),
 <a list of 50 Patch objects>)
In [213]:
In [214]:
In [ ]:
In [215]:
In [216]:
In [217]:
In [218]:
Out[218]:
Index(['EventId', 'DER_mass_MMC', 'DER_mass_transverse_met_lep',
       'DER_mass_vis', 'DER_pt_h', 'DER_deltaeta_jet_jet', 'DER_mass_jet_jet',
       'DER_prodeta_jet_jet', 'DER_deltar_tau_lep', 'DER_pt_tot', 'DER_sum_pt',
       'DER_pt_ratio_lep_tau', 'DER_met_phi_centrality',
       'DER_lep_eta_centrality', 'PRI_tau_pt', 'PRI_tau_eta', 'PRI_tau_phi',
       'PRI_lep_pt', 'PRI_lep_eta', 'PRI_lep_phi', 'PRI_met', 'PRI_met_phi',
       'PRI_met_sumet', 'PRI_jet_num', 'PRI_jet_leading_pt',
       'PRI_jet_leading_eta', 'PRI_jet_leading_phi', 'PRI_jet_subleading_pt',
       'PRI_jet_subleading_eta', 'PRI_jet_subleading_phi', 'PRI_jet_all_pt',
       'Weight', 'Label'],
      dtype='object')
In [219]:
In [220]:
In [221]:
In [222]:
In [223]:
In [224]:
In [225]:
In [226]:
[0.18515646 0.16167186]
[0.24742629 0.10978555]
[0.20681785 0.11292406]
In [447]:
Out[447]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7603302550>
In [448]:
Out[448]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f76047074e0>
In [449]:
Out[449]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7604611cf8>
In [ ]:
In [230]:
In [231]:
[0.18515646 0.16167186 0.13806351]
[0.24742629 0.10978554 0.08655618]
[0.20681785 0.11292405 0.07750242]
In [232]:
In [233]:
In [234]:
In [235]:
[0.53405051 0.14226634]
[0.75516214 0.10120474]
[0.68082688 0.08636026]
In [450]:
Out[450]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7604727cf8>
In [451]:
Out[451]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7604738710>
In [452]:
Out[452]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f76049a4198>
In [239]:
[0.53405051 0.14226634 0.07950811]
[0.75516214 0.10120474 0.03680352]
[0.68082688 0.08636026 0.04351135]
In [ ]:
In [396]:
In [399]:
0.9676285593445852
0.9706432481751432
0.9353877928385876
In [ ]:
In [ ]:

Applying Classification algorithms

Logistic Regression

In [240]:
In [241]:
Out[241]:
Index(['EventId', 'DER_mass_MMC', 'DER_mass_transverse_met_lep',
       'DER_mass_vis', 'DER_pt_h', 'DER_deltar_tau_lep', 'DER_pt_tot',
       'DER_sum_pt', 'DER_pt_ratio_lep_tau', 'DER_met_phi_centrality',
       'PRI_tau_pt', 'PRI_tau_eta', 'PRI_tau_phi', 'PRI_lep_pt', 'PRI_lep_eta',
       'PRI_lep_phi', 'PRI_met', 'PRI_met_phi', 'PRI_met_sumet',
       'PRI_jet_all_pt', 'Weight', 'Label'],
      dtype='object')
In [242]:
In [243]:
In [244]:
In [245]:
In [246]:
/home/013729625/dsenv/lib64/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning:

Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.

Out[246]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)
In [247]:
In [248]:
In [249]:
[[27344  2458]
 [ 4473  5691]]
              precision    recall  f1-score   support

           b       0.86      0.92      0.89     29802
           s       0.70      0.56      0.62     10164

    accuracy                           0.83     39966
   macro avg       0.78      0.74      0.75     39966
weighted avg       0.82      0.83      0.82     39966

In [250]:
/home/013729625/dsenv/lib64/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning:

Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.

[[16647  3301]
 [ 5656  5414]]
              precision    recall  f1-score   support

           b       0.75      0.83      0.79     19948
           s       0.62      0.49      0.55     11070

    accuracy                           0.71     31018
   macro avg       0.68      0.66      0.67     31018
weighted avg       0.70      0.71      0.70     31018

In [251]:
/home/013729625/dsenv/lib64/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning:

Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.

[[12808  3260]
 [ 4293  8657]]
              precision    recall  f1-score   support

           b       0.75      0.80      0.77     16068
           s       0.73      0.67      0.70     12950

    accuracy                           0.74     29018
   macro avg       0.74      0.73      0.73     29018
weighted avg       0.74      0.74      0.74     29018

In [ ]:

Type Markdown and LaTeX: α2

In [ ]:
In [252]:
In [253]:
In [254]:
[[27786  2016]
 [ 3947  6217]]
              precision    recall  f1-score   support

           b       0.88      0.93      0.90     29802
           s       0.76      0.61      0.68     10164

    accuracy                           0.85     39966
   macro avg       0.82      0.77      0.79     39966
weighted avg       0.84      0.85      0.85     39966

In [255]:
[[17618  2330]
 [ 3456  7614]]
              precision    recall  f1-score   support

           b       0.84      0.88      0.86     19948
           s       0.77      0.69      0.72     11070

    accuracy                           0.81     31018
   macro avg       0.80      0.79      0.79     31018
weighted avg       0.81      0.81      0.81     31018

In [256]:
[[14179  1889]
 [ 2408 10542]]
              precision    recall  f1-score   support

           b       0.85      0.88      0.87     16068
           s       0.85      0.81      0.83     12950

    accuracy                           0.85     29018
   macro avg       0.85      0.85      0.85     29018
weighted avg       0.85      0.85      0.85     29018

In [ ]:

XGBoost Classifier

In [ ]:
In [258]:
In [262]:
In [263]:
[[27627  2175]
 [ 3761  6403]]
              precision    recall  f1-score   support

           b       0.88      0.93      0.90     29802
           s       0.75      0.63      0.68     10164

    accuracy                           0.85     39966
   macro avg       0.81      0.78      0.79     39966
weighted avg       0.85      0.85      0.85     39966

In [354]:
[[17533  2415]
 [ 3210  7860]]
              precision    recall  f1-score   support

           b       0.85      0.88      0.86     19948
           s       0.76      0.71      0.74     11070

    accuracy                           0.82     31018
   macro avg       0.81      0.79      0.80     31018
weighted avg       0.82      0.82      0.82     31018

In [355]:
[[14002  2066]
 [ 2238 10712]]
              precision    recall  f1-score   support

           b       0.86      0.87      0.87     16068
           s       0.84      0.83      0.83     12950

    accuracy                           0.85     29018
   macro avg       0.85      0.85      0.85     29018
weighted avg       0.85      0.85      0.85     29018

In [ ]:

t-SNE on grouped data

In [ ]:
In [278]:
[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 99913 samples in 0.425s...
[t-SNE] Computed neighbors for 99913 samples in 21.384s...
[t-SNE] Computed conditional probabilities for sample 1000 / 99913
[t-SNE] Computed conditional probabilities for sample 2000 / 99913
[t-SNE] Computed conditional probabilities for sample 3000 / 99913
[t-SNE] Computed conditional probabilities for sample 4000 / 99913
[t-SNE] Computed conditional probabilities for sample 5000 / 99913
[t-SNE] Computed conditional probabilities for sample 6000 / 99913
[t-SNE] Computed conditional probabilities for sample 7000 / 99913
[t-SNE] Computed conditional probabilities for sample 8000 / 99913
[t-SNE] Computed conditional probabilities for sample 9000 / 99913
[t-SNE] Computed conditional probabilities for sample 10000 / 99913
[t-SNE] Computed conditional probabilities for sample 11000 / 99913
[t-SNE] Computed conditional probabilities for sample 12000 / 99913
[t-SNE] Computed conditional probabilities for sample 13000 / 99913
[t-SNE] Computed conditional probabilities for sample 14000 / 99913
[t-SNE] Computed conditional probabilities for sample 15000 / 99913
[t-SNE] Computed conditional probabilities for sample 16000 / 99913
[t-SNE] Computed conditional probabilities for sample 17000 / 99913
[t-SNE] Computed conditional probabilities for sample 18000 / 99913
[t-SNE] Computed conditional probabilities for sample 19000 / 99913
[t-SNE] Computed conditional probabilities for sample 20000 / 99913
[t-SNE] Computed conditional probabilities for sample 21000 / 99913
[t-SNE] Computed conditional probabilities for sample 22000 / 99913
[t-SNE] Computed conditional probabilities for sample 23000 / 99913
[t-SNE] Computed conditional probabilities for sample 24000 / 99913
[t-SNE] Computed conditional probabilities for sample 25000 / 99913
[t-SNE] Computed conditional probabilities for sample 26000 / 99913
[t-SNE] Computed conditional probabilities for sample 27000 / 99913
[t-SNE] Computed conditional probabilities for sample 28000 / 99913
[t-SNE] Computed conditional probabilities for sample 29000 / 99913
[t-SNE] Computed conditional probabilities for sample 30000 / 99913
[t-SNE] Computed conditional probabilities for sample 31000 / 99913
[t-SNE] Computed conditional probabilities for sample 32000 / 99913
[t-SNE] Computed conditional probabilities for sample 33000 / 99913
[t-SNE] Computed conditional probabilities for sample 34000 / 99913
[t-SNE] Computed conditional probabilities for sample 35000 / 99913
[t-SNE] Computed conditional probabilities for sample 36000 / 99913
[t-SNE] Computed conditional probabilities for sample 37000 / 99913
[t-SNE] Computed conditional probabilities for sample 38000 / 99913
[t-SNE] Computed conditional probabilities for sample 39000 / 99913
[t-SNE] Computed conditional probabilities for sample 40000 / 99913
[t-SNE] Computed conditional probabilities for sample 41000 / 99913
[t-SNE] Computed conditional probabilities for sample 42000 / 99913
[t-SNE] Computed conditional probabilities for sample 43000 / 99913
[t-SNE] Computed conditional probabilities for sample 44000 / 99913
[t-SNE] Computed conditional probabilities for sample 45000 / 99913
[t-SNE] Computed conditional probabilities for sample 46000 / 99913
[t-SNE] Computed conditional probabilities for sample 47000 / 99913
[t-SNE] Computed conditional probabilities for sample 48000 / 99913
[t-SNE] Computed conditional probabilities for sample 49000 / 99913
[t-SNE] Computed conditional probabilities for sample 50000 / 99913
[t-SNE] Computed conditional probabilities for sample 51000 / 99913
[t-SNE] Computed conditional probabilities for sample 52000 / 99913
[t-SNE] Computed conditional probabilities for sample 53000 / 99913
[t-SNE] Computed conditional probabilities for sample 54000 / 99913
[t-SNE] Computed conditional probabilities for sample 55000 / 99913
[t-SNE] Computed conditional probabilities for sample 56000 / 99913
[t-SNE] Computed conditional probabilities for sample 57000 / 99913
[t-SNE] Computed conditional probabilities for sample 58000 / 99913
[t-SNE] Computed conditional probabilities for sample 59000 / 99913
[t-SNE] Computed conditional probabilities for sample 60000 / 99913
[t-SNE] Computed conditional probabilities for sample 61000 / 99913
[t-SNE] Computed conditional probabilities for sample 62000 / 99913
[t-SNE] Computed conditional probabilities for sample 63000 / 99913
[t-SNE] Computed conditional probabilities for sample 64000 / 99913
[t-SNE] Computed conditional probabilities for sample 65000 / 99913
[t-SNE] Computed conditional probabilities for sample 66000 / 99913
[t-SNE] Computed conditional probabilities for sample 67000 / 99913
[t-SNE] Computed conditional probabilities for sample 68000 / 99913
[t-SNE] Computed conditional probabilities for sample 69000 / 99913
[t-SNE] Computed conditional probabilities for sample 70000 / 99913
[t-SNE] Computed conditional probabilities for sample 71000 / 99913
[t-SNE] Computed conditional probabilities for sample 72000 / 99913
[t-SNE] Computed conditional probabilities for sample 73000 / 99913
[t-SNE] Computed conditional probabilities for sample 74000 / 99913
[t-SNE] Computed conditional probabilities for sample 75000 / 99913
[t-SNE] Computed conditional probabilities for sample 76000 / 99913
[t-SNE] Computed conditional probabilities for sample 77000 / 99913
[t-SNE] Computed conditional probabilities for sample 78000 / 99913
[t-SNE] Computed conditional probabilities for sample 79000 / 99913
[t-SNE] Computed conditional probabilities for sample 80000 / 99913
[t-SNE] Computed conditional probabilities for sample 81000 / 99913
[t-SNE] Computed conditional probabilities for sample 82000 / 99913
[t-SNE] Computed conditional probabilities for sample 83000 / 99913
[t-SNE] Computed conditional probabilities for sample 84000 / 99913
[t-SNE] Computed conditional probabilities for sample 85000 / 99913
[t-SNE] Computed conditional probabilities for sample 86000 / 99913
[t-SNE] Computed conditional probabilities for sample 87000 / 99913
[t-SNE] Computed conditional probabilities for sample 88000 / 99913
[t-SNE] Computed conditional probabilities for sample 89000 / 99913
[t-SNE] Computed conditional probabilities for sample 90000 / 99913
[t-SNE] Computed conditional probabilities for sample 91000 / 99913
[t-SNE] Computed conditional probabilities for sample 92000 / 99913
[t-SNE] Computed conditional probabilities for sample 93000 / 99913
[t-SNE] Computed conditional probabilities for sample 94000 / 99913
[t-SNE] Computed conditional probabilities for sample 95000 / 99913
[t-SNE] Computed conditional probabilities for sample 96000 / 99913
[t-SNE] Computed conditional probabilities for sample 97000 / 99913
[t-SNE] Computed conditional probabilities for sample 98000 / 99913
[t-SNE] Computed conditional probabilities for sample 99000 / 99913
[t-SNE] Computed conditional probabilities for sample 99913 / 99913
[t-SNE] Mean sigma: 6.305854
[t-SNE] KL divergence after 250 iterations with early exaggeration: 100.063934
[t-SNE] KL divergence after 251 iterations: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000
t-SNE done! Time elapsed: 475.10227131843567 seconds
In [266]:
In [453]:
Out[453]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7604cb5668>
In [276]:
[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 77544 samples in 0.145s...
[t-SNE] Computed neighbors for 77544 samples in 26.416s...
[t-SNE] Computed conditional probabilities for sample 1000 / 77544
[t-SNE] Computed conditional probabilities for sample 2000 / 77544
[t-SNE] Computed conditional probabilities for sample 3000 / 77544
[t-SNE] Computed conditional probabilities for sample 4000 / 77544
[t-SNE] Computed conditional probabilities for sample 5000 / 77544
[t-SNE] Computed conditional probabilities for sample 6000 / 77544
[t-SNE] Computed conditional probabilities for sample 7000 / 77544
[t-SNE] Computed conditional probabilities for sample 8000 / 77544
[t-SNE] Computed conditional probabilities for sample 9000 / 77544
[t-SNE] Computed conditional probabilities for sample 10000 / 77544
[t-SNE] Computed conditional probabilities for sample 11000 / 77544
[t-SNE] Computed conditional probabilities for sample 12000 / 77544
[t-SNE] Computed conditional probabilities for sample 13000 / 77544
[t-SNE] Computed conditional probabilities for sample 14000 / 77544
[t-SNE] Computed conditional probabilities for sample 15000 / 77544
[t-SNE] Computed conditional probabilities for sample 16000 / 77544
[t-SNE] Computed conditional probabilities for sample 17000 / 77544
[t-SNE] Computed conditional probabilities for sample 18000 / 77544
[t-SNE] Computed conditional probabilities for sample 19000 / 77544
[t-SNE] Computed conditional probabilities for sample 20000 / 77544
[t-SNE] Computed conditional probabilities for sample 21000 / 77544
[t-SNE] Computed conditional probabilities for sample 22000 / 77544
[t-SNE] Computed conditional probabilities for sample 23000 / 77544
[t-SNE] Computed conditional probabilities for sample 24000 / 77544
[t-SNE] Computed conditional probabilities for sample 25000 / 77544
[t-SNE] Computed conditional probabilities for sample 26000 / 77544
[t-SNE] Computed conditional probabilities for sample 27000 / 77544
[t-SNE] Computed conditional probabilities for sample 28000 / 77544
[t-SNE] Computed conditional probabilities for sample 29000 / 77544
[t-SNE] Computed conditional probabilities for sample 30000 / 77544
[t-SNE] Computed conditional probabilities for sample 31000 / 77544
[t-SNE] Computed conditional probabilities for sample 32000 / 77544
[t-SNE] Computed conditional probabilities for sample 33000 / 77544
[t-SNE] Computed conditional probabilities for sample 34000 / 77544
[t-SNE] Computed conditional probabilities for sample 35000 / 77544
[t-SNE] Computed conditional probabilities for sample 36000 / 77544
[t-SNE] Computed conditional probabilities for sample 37000 / 77544
[t-SNE] Computed conditional probabilities for sample 38000 / 77544
[t-SNE] Computed conditional probabilities for sample 39000 / 77544
[t-SNE] Computed conditional probabilities for sample 40000 / 77544
[t-SNE] Computed conditional probabilities for sample 41000 / 77544
[t-SNE] Computed conditional probabilities for sample 42000 / 77544
[t-SNE] Computed conditional probabilities for sample 43000 / 77544
[t-SNE] Computed conditional probabilities for sample 44000 / 77544
[t-SNE] Computed conditional probabilities for sample 45000 / 77544
[t-SNE] Computed conditional probabilities for sample 46000 / 77544
[t-SNE] Computed conditional probabilities for sample 47000 / 77544
[t-SNE] Computed conditional probabilities for sample 48000 / 77544
[t-SNE] Computed conditional probabilities for sample 49000 / 77544
[t-SNE] Computed conditional probabilities for sample 50000 / 77544
[t-SNE] Computed conditional probabilities for sample 51000 / 77544
[t-SNE] Computed conditional probabilities for sample 52000 / 77544
[t-SNE] Computed conditional probabilities for sample 53000 / 77544
[t-SNE] Computed conditional probabilities for sample 54000 / 77544
[t-SNE] Computed conditional probabilities for sample 55000 / 77544
[t-SNE] Computed conditional probabilities for sample 56000 / 77544
[t-SNE] Computed conditional probabilities for sample 57000 / 77544
[t-SNE] Computed conditional probabilities for sample 58000 / 77544
[t-SNE] Computed conditional probabilities for sample 59000 / 77544
[t-SNE] Computed conditional probabilities for sample 60000 / 77544
[t-SNE] Computed conditional probabilities for sample 61000 / 77544
[t-SNE] Computed conditional probabilities for sample 62000 / 77544
[t-SNE] Computed conditional probabilities for sample 63000 / 77544
[t-SNE] Computed conditional probabilities for sample 64000 / 77544
[t-SNE] Computed conditional probabilities for sample 65000 / 77544
[t-SNE] Computed conditional probabilities for sample 66000 / 77544
[t-SNE] Computed conditional probabilities for sample 67000 / 77544
[t-SNE] Computed conditional probabilities for sample 68000 / 77544
[t-SNE] Computed conditional probabilities for sample 69000 / 77544
[t-SNE] Computed conditional probabilities for sample 70000 / 77544
[t-SNE] Computed conditional probabilities for sample 71000 / 77544
[t-SNE] Computed conditional probabilities for sample 72000 / 77544
[t-SNE] Computed conditional probabilities for sample 73000 / 77544
[t-SNE] Computed conditional probabilities for sample 74000 / 77544
[t-SNE] Computed conditional probabilities for sample 75000 / 77544
[t-SNE] Computed conditional probabilities for sample 76000 / 77544
[t-SNE] Computed conditional probabilities for sample 77000 / 77544
[t-SNE] Computed conditional probabilities for sample 77544 / 77544
[t-SNE] Mean sigma: 9.815991
[t-SNE] KL divergence after 250 iterations with early exaggeration: 103.542763
[t-SNE] KL divergence after 251 iterations: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000
t-SNE done! Time elapsed: 283.6398947238922 seconds
In [454]:
Out[454]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7604fa4e80>
In [272]:
[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 72543 samples in 0.105s...
[t-SNE] Computed neighbors for 72543 samples in 30.156s...
[t-SNE] Computed conditional probabilities for sample 1000 / 72543
[t-SNE] Computed conditional probabilities for sample 2000 / 72543
[t-SNE] Computed conditional probabilities for sample 3000 / 72543
[t-SNE] Computed conditional probabilities for sample 4000 / 72543
[t-SNE] Computed conditional probabilities for sample 5000 / 72543
[t-SNE] Computed conditional probabilities for sample 6000 / 72543
[t-SNE] Computed conditional probabilities for sample 7000 / 72543
[t-SNE] Computed conditional probabilities for sample 8000 / 72543
[t-SNE] Computed conditional probabilities for sample 9000 / 72543
[t-SNE] Computed conditional probabilities for sample 10000 / 72543
[t-SNE] Computed conditional probabilities for sample 11000 / 72543
[t-SNE] Computed conditional probabilities for sample 12000 / 72543
[t-SNE] Computed conditional probabilities for sample 13000 / 72543
[t-SNE] Computed conditional probabilities for sample 14000 / 72543
[t-SNE] Computed conditional probabilities for sample 15000 / 72543
[t-SNE] Computed conditional probabilities for sample 16000 / 72543
[t-SNE] Computed conditional probabilities for sample 17000 / 72543
[t-SNE] Computed conditional probabilities for sample 18000 / 72543
[t-SNE] Computed conditional probabilities for sample 19000 / 72543
[t-SNE] Computed conditional probabilities for sample 20000 / 72543
[t-SNE] Computed conditional probabilities for sample 21000 / 72543
[t-SNE] Computed conditional probabilities for sample 22000 / 72543
[t-SNE] Computed conditional probabilities for sample 23000 / 72543
[t-SNE] Computed conditional probabilities for sample 24000 / 72543
[t-SNE] Computed conditional probabilities for sample 25000 / 72543
[t-SNE] Computed conditional probabilities for sample 26000 / 72543
[t-SNE] Computed conditional probabilities for sample 27000 / 72543
[t-SNE] Computed conditional probabilities for sample 28000 / 72543
[t-SNE] Computed conditional probabilities for sample 29000 / 72543
[t-SNE] Computed conditional probabilities for sample 30000 / 72543
[t-SNE] Computed conditional probabilities for sample 31000 / 72543
[t-SNE] Computed conditional probabilities for sample 32000 / 72543
[t-SNE] Computed conditional probabilities for sample 33000 / 72543
[t-SNE] Computed conditional probabilities for sample 34000 / 72543
[t-SNE] Computed conditional probabilities for sample 35000 / 72543
[t-SNE] Computed conditional probabilities for sample 36000 / 72543
[t-SNE] Computed conditional probabilities for sample 37000 / 72543
[t-SNE] Computed conditional probabilities for sample 38000 / 72543
[t-SNE] Computed conditional probabilities for sample 39000 / 72543
[t-SNE] Computed conditional probabilities for sample 40000 / 72543
[t-SNE] Computed conditional probabilities for sample 41000 / 72543
[t-SNE] Computed conditional probabilities for sample 42000 / 72543
[t-SNE] Computed conditional probabilities for sample 43000 / 72543
[t-SNE] Computed conditional probabilities for sample 44000 / 72543
[t-SNE] Computed conditional probabilities for sample 45000 / 72543
[t-SNE] Computed conditional probabilities for sample 46000 / 72543
[t-SNE] Computed conditional probabilities for sample 47000 / 72543
[t-SNE] Computed conditional probabilities for sample 48000 / 72543
[t-SNE] Computed conditional probabilities for sample 49000 / 72543
[t-SNE] Computed conditional probabilities for sample 50000 / 72543
[t-SNE] Computed conditional probabilities for sample 51000 / 72543
[t-SNE] Computed conditional probabilities for sample 52000 / 72543
[t-SNE] Computed conditional probabilities for sample 53000 / 72543
[t-SNE] Computed conditional probabilities for sample 54000 / 72543
[t-SNE] Computed conditional probabilities for sample 55000 / 72543
[t-SNE] Computed conditional probabilities for sample 56000 / 72543
[t-SNE] Computed conditional probabilities for sample 57000 / 72543
[t-SNE] Computed conditional probabilities for sample 58000 / 72543
[t-SNE] Computed conditional probabilities for sample 59000 / 72543
[t-SNE] Computed conditional probabilities for sample 60000 / 72543
[t-SNE] Computed conditional probabilities for sample 61000 / 72543
[t-SNE] Computed conditional probabilities for sample 62000 / 72543
[t-SNE] Computed conditional probabilities for sample 63000 / 72543
[t-SNE] Computed conditional probabilities for sample 64000 / 72543
[t-SNE] Computed conditional probabilities for sample 65000 / 72543
[t-SNE] Computed conditional probabilities for sample 66000 / 72543
[t-SNE] Computed conditional probabilities for sample 67000 / 72543
[t-SNE] Computed conditional probabilities for sample 68000 / 72543
[t-SNE] Computed conditional probabilities for sample 69000 / 72543
[t-SNE] Computed conditional probabilities for sample 70000 / 72543
[t-SNE] Computed conditional probabilities for sample 71000 / 72543
[t-SNE] Computed conditional probabilities for sample 72000 / 72543
[t-SNE] Computed conditional probabilities for sample 72543 / 72543
[t-SNE] Mean sigma: 22.508396
[t-SNE] KL divergence after 250 iterations with early exaggeration: 97.970383
[t-SNE] KL divergence after 251 iterations: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000
t-SNE done! Time elapsed: 231.39938879013062 seconds
In [455]:
Out[455]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f76053142b0>
In [ ]:

Type Markdown and LaTeX: α2

Type Markdown and LaTeX: α2

Type Markdown and LaTeX: α2

Type Markdown and LaTeX: α2

Type Markdown and LaTeX: α2

Type Markdown and LaTeX: α2

Type Markdown and LaTeX: α2

Type Markdown and LaTeX: α2

Type Markdown and LaTeX: α2

Type Markdown and LaTeX: α2

Type Markdown and LaTeX: α2

Type Markdown and LaTeX: α2

Type Markdown and LaTeX: α2

Type Markdown and LaTeX: α2

In [ ]:
In [ ]:
In [ ]:

Type Markdown and LaTeX: α2

In [345]:
In [343]:
In [344]:
In [346]:
/home/013729625/dsenv/lib64/python3.6/site-packages/xgboost/core.py:587: FutureWarning:

Series.base is deprecated and will be removed in a future version

[16:50:13] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.
Out[346]:
1.529429935891298
In [350]:
/home/013729625/dsenv/lib64/python3.6/site-packages/xgboost/core.py:587: FutureWarning:

Series.base is deprecated and will be removed in a future version

[16:51:54] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.
Out[350]:
0.9447710079389715
In [351]:
/home/013729625/dsenv/lib64/python3.6/site-packages/xgboost/core.py:587: FutureWarning:

Series.base is deprecated and will be removed in a future version

[16:52:10] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.
Out[351]:
0.585320408869103
In [ ]:
In [ ]:

linear regression

In [347]:
In [348]:
In [349]:
Out[349]:
1.7477650489711272
In [352]:
Out[352]:
1.0627384334604566
In [353]:
Out[353]:
0.6567532378011104
In [ ]:
In [ ]:

final comparison

In [376]:
Out[376]:
(99913, 19)
In [378]:
Out[378]:
Index(['EventId', 'DER_mass_MMC', 'DER_mass_transverse_met_lep',
       'DER_mass_vis', 'DER_pt_h', 'DER_deltar_tau_lep', 'DER_pt_tot',
       'DER_sum_pt', 'DER_pt_ratio_lep_tau', 'DER_met_phi_centrality',
       'PRI_tau_pt', 'PRI_tau_eta', 'PRI_tau_phi', 'PRI_lep_pt', 'PRI_lep_eta',
       'PRI_lep_phi', 'PRI_met', 'PRI_met_phi', 'PRI_met_sumet',
       'PRI_jet_all_pt', 'Weight', 'Label'],
      dtype='object')
In [384]:
In [388]:
In [416]:
Out[416]:
Index(['DER_mass_MMC', 'DER_mass_transverse_met_lep', 'DER_mass_vis',
       'DER_pt_h', 'DER_deltar_tau_lep', 'DER_pt_tot', 'DER_sum_pt',
       'DER_pt_ratio_lep_tau', 'DER_met_phi_centrality', 'PRI_tau_pt',
       'PRI_tau_eta', 'PRI_tau_phi', 'PRI_lep_pt', 'PRI_lep_eta',
       'PRI_lep_phi', 'PRI_met', 'PRI_met_phi', 'PRI_met_sumet',
       'PRI_jet_all_pt', 'Weight', 'Label'],
      dtype='object')
In [391]:
[[27627  2175]
 [ 3761  6403]]
              precision    recall  f1-score   support

           b       0.88      0.93      0.90     29802
           s       0.75      0.63      0.68     10164

    accuracy                           0.85     39966
   macro avg       0.81      0.78      0.79     39966
weighted avg       0.85      0.85      0.85     39966

In [392]:
/home/013729625/dsenv/lib64/python3.6/site-packages/xgboost/core.py:587: FutureWarning:

Series.base is deprecated and will be removed in a future version

[18:04:53] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.
Out[392]:
1.5294179305501039
In [ ]:
In [ ]:

apply xgb classifier and xgb regressor on test data

In [404]:
In [405]:
Out[405]:
EventId DER_mass_MMC DER_mass_transverse_met_lep DER_mass_vis DER_pt_h DER_deltaeta_jet_jet DER_mass_jet_jet DER_prodeta_jet_jet DER_deltar_tau_lep DER_pt_tot ... PRI_met_phi PRI_met_sumet PRI_jet_num PRI_jet_leading_pt PRI_jet_leading_eta PRI_jet_leading_phi PRI_jet_subleading_pt PRI_jet_subleading_eta PRI_jet_subleading_phi PRI_jet_all_pt
0 350000 -999.000 79.589 23.916 3.036 -999.000 -999.000 -999.000 0.903 3.036 ... 2.022 98.556 0 -999.000 -999.000 -999.000 -999.000 -999.000 -999.000 -0.000
1 350001 106.398 67.490 87.949 49.994 -999.000 -999.000 -999.000 2.048 2.679 ... -1.138 176.251 1 47.575 -0.553 -0.849 -999.000 -999.000 -999.000 47.575
2 350002 117.794 56.226 96.358 4.137 -999.000 -999.000 -999.000 2.755 4.137 ... -1.868 111.505 0 -999.000 -999.000 -999.000 -999.000 -999.000 -999.000 0.000
3 350003 135.861 30.604 97.288 9.104 -999.000 -999.000 -999.000 2.811 9.104 ... 1.172 164.707 0 -999.000 -999.000 -999.000 -999.000 -999.000 -999.000 0.000
4 350004 74.159 82.772 58.731 89.646 1.347 536.663 -0.339 1.028 77.213 ... -0.231 869.614 3 254.085 -1.013 -0.334 185.857 0.335 2.587 599.213

5 rows × 31 columns

In [407]:
In [409]:
In [421]:
Out[421]:
Index(['EventId', 'DER_mass_MMC', 'DER_mass_transverse_met_lep',
       'DER_mass_vis', 'DER_pt_h', 'DER_deltaeta_jet_jet', 'DER_mass_jet_jet',
       'DER_prodeta_jet_jet', 'DER_deltar_tau_lep', 'DER_pt_tot', 'DER_sum_pt',
       'DER_pt_ratio_lep_tau', 'DER_met_phi_centrality',
       'DER_lep_eta_centrality', 'PRI_tau_pt', 'PRI_tau_eta', 'PRI_tau_phi',
       'PRI_lep_pt', 'PRI_lep_eta', 'PRI_lep_phi', 'PRI_met', 'PRI_met_phi',
       'PRI_met_sumet', 'PRI_jet_num', 'PRI_jet_leading_pt',
       'PRI_jet_leading_eta', 'PRI_jet_leading_phi', 'PRI_jet_subleading_pt',
       'PRI_jet_subleading_eta', 'PRI_jet_subleading_phi', 'PRI_jet_all_pt'],
      dtype='object')
In [422]:
In [423]:
/home/013729625/dsenv/lib64/python3.6/site-packages/pandas/core/frame.py:4117: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [417]:
/home/013729625/dsenv/lib64/python3.6/site-packages/xgboost/core.py:587: FutureWarning:

Series.base is deprecated and will be removed in a future version

/home/013729625/dsenv/lib64/python3.6/site-packages/xgboost/core.py:588: FutureWarning:

Series.base is deprecated and will be removed in a future version

[18:30:54] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.
Out[417]:
XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0,
             importance_type='gain', learning_rate=0.1, max_delta_step=0,
             max_depth=3, min_child_weight=1, missing=None, n_estimators=300,
             n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
             silent=None, subsample=1, verbosity=1)
In [425]:
In [426]:
Out[426]:
(220156,)
In [429]:
[18:39:14] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.
Out[429]:
(220156,)
In [431]:
Out[431]:
(169716,)
In [430]:
[18:40:01] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.
Out[430]:
(160128,)
In [433]:
In [434]:
/home/013729625/dsenv/lib64/python3.6/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [435]:
/home/013729625/dsenv/lib64/python3.6/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy